---
title: "Supplementary tables"
author: "Lakhansing Pardeshi"
date: "`r Sys.Date()`"
format:
html:
embed-resources: true
df-print: paged
knitr:
opts_chunk:
fig.height: 7
---
```{r}
#| label: setup
#| echo: false
#| warning: false
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(org.Pectobacterium.spp.pan.eg.db))
suppressPackageStartupMessages(library(DT))
suppressPackageStartupMessages(library(openxlsx))
rm(list = ls())
source("https://raw.githubusercontent.com/lakhanp1/omics_utils/main/RScripts/utils.R")
source("scripts/utils/config_functions.R")
source("scripts/utils/homology_groups.R")
################################################################################
set.seed(124)
confs <- prefix_config_paths(
conf = suppressWarnings(configr::read.config(file = "project_config.yaml")),
dir = "."
)
pangenome <- confs$data$pangenomes$pectobacterium.v2$name
panConf <- confs$data$pangenomes[[pangenome]]
panOrgDb <- org.Pectobacterium.spp.pan.eg.db
## write tables to excel file
wb <- openxlsx::createWorkbook()
table_counter <- 1
```
::: {.panel-tabset}
## Table S`r table_counter`
Metadata for all the genome in the current pangenome
```{r}
#| echo: false
#| column: screen-inset-right
metadata <- suppressMessages(readr::read_csv(panConf$files$metadata)) %>%
dplyr::select(-starts_with("nodepath."), -Genome) %>%
dplyr::select(
genomeId, source, SpeciesName, strain, geo_loc_country, collection_year, host,
blackleg_phenotype = virulence, blackleg_PCR = virulence_pcr,
AssemblyAccession, BioSampleAccn, everything()
)
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Table S", table_counter, " Metadata for all the genome in the current pangenome", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = metadata, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = metadata,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Pangenome statistics: Homology group counts for the genus pangenome and
individual species pangenome
```{r}
#| echo: false
#| column: screen-inset-right
hg_stats <- suppressMessages(
readr::read_tsv(confs$analysis$homology_groups$files$spp_group_stats)
) %>%
dplyr::rename(
HGs_per_class = count,
HGs_pangenome = nHgs
)
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Table S", table_counter, ": Pangenome statistics: Homology group counts for the genus pangenome",
"and individual species pangenome", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = hg_stats, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = hg_stats,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
GO enrichment for core, accessory and unique homology groups with respect to
the pangenome background
```{r}
#| echo: false
#| column: screen-inset-right
go_table <- suppressMessages(
readr::read_tsv(confs$analysis$homology_groups$files$spp_group_go)
) %>%
dplyr::filter(SpeciesName == "pangenome") %>%
dplyr::select(-SpeciesName, -subpan_class)
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Table S", table_counter, ": GO enrichment for core, accessory and unique homology groups",
" with respect to the pangenome background", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = go_table, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = go_table,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
geNomad prophage prediction result summary for 454 genomes
```{r}
#| echo: false
#| column: screen-inset-right
prophages <- suppressMessages(readr::read_tsv(confs$data$prophages$files$data)) %>%
dplyr::mutate(
integrated = dplyr::if_else(
condition = is.na(start) & is.na(end), true = "N", false = "Y"
)
) %>%
dplyr::select(prophage_id, genomeId, everything()) %>%
dplyr::relocate(integrated, .after = end) %>%
dplyr::left_join(
y = dplyr::select(metadata, genomeId, strain, blackleg_phenotype, blackleg_PCR),
by = "genomeId"
) %>%
dplyr::relocate(strain, blackleg_phenotype, blackleg_PCR, .after = SpeciesName)
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Table S", table_counter, ": geNomad prophage prediction result summary for 454 genomes", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = prophages, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = prophages,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Prophage statistics for individual genomes
```{r}
#| echo: false
#| column: screen-inset-right
phage_stats_genomes <- suppressMessages(
readr::read_tsv(confs$analysis$prophages$summary$files$prophage_stats_genome)
) %>%
dplyr::select(-dplyr::starts_with("nodepath")) %>%
dplyr::left_join(
y = dplyr::select(metadata, genomeId, strain, blackleg_phenotype, blackleg_PCR),
by = "genomeId"
) %>%
dplyr::relocate(strain, blackleg_phenotype, blackleg_PCR, .after = SpeciesName)
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Table S", table_counter, ": Prophage statistics for individual genomes", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = phage_stats_genomes, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = phage_stats_genomes,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Species wise and pangenome wide prophage statistics
```{r}
#| echo: false
#| column: screen-inset-right
phage_stats <- suppressMessages(
readr::read_tsv(confs$analysis$prophages$summary$files$prophage_stats_species)
) %>%
dplyr::rename(
n_phages = n_vir_sp,
phages_per_genome = mean_vir_per_g,
HG_core = core,
HG_accessory = accessory,
HG_unique = unique,
HG_total = total,
HG_phage = phage_nHgs.total,
phage_HG_ratio = phageRatio.total,
phage_HG_core = phage_nHgs.core,
phage_HG_accessory = phage_nHgs.accessory,
phage_HG_unique = phage_nHgs.unique
) %>%
dplyr::select(-dplyr::starts_with("phageRatio."))
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Tabls S", table_counter, ": Species wise and pangenome wide prophage statistics", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = phage_stats, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = phage_stats,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Prophage clustering based on syntenic Jaccard index
```{r}
#| echo: false
#| column: screen-inset-right
phage_grps <- suppressMessages(
readr::read_tsv(confs$analysis$prophages$files$clusters)
) %>%
dplyr::select(-starts_with("nodepath.")) %>%
dplyr::left_join(
y = dplyr::select(metadata, genomeId, strain, blackleg_phenotype, blackleg_PCR),
by = "genomeId"
) %>%
dplyr::relocate(strain, blackleg_phenotype, blackleg_PCR, .after = SpeciesName)
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Table S", table_counter, ": Prophage clustering based on syntenic Jaccard index", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = phage_grps, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = phage_grps,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Inhouse sample information
```{r}
#| echo: false
#| column: screen-inset-right
inhouse_samples <- dplyr::filter(metadata, source != "NCBI") %>%
dplyr::select(genomeId, source, sampleId, BioSampleAccn, BioprojectAccn)
currentSheet <- paste("Table_S", table_counter, sep = "")
openxlsx::addWorksheet(wb, sheetName = currentSheet)
openxlsx::writeData(
wb = wb, sheet = currentSheet,
x = paste("## Table S", table_counter, ": Inhouse genome assemblies", sep = "")
)
openxlsx::writeDataTable(
wb = wb, x = inhouse_samples, sheet = currentSheet,
startCol = 1, startRow = 2, withFilter = TRUE,
keepNA = TRUE, na.string = "NA"
)
openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2)
DT::datatable(
data = inhouse_samples,
rownames = FALSE,
filter = "top",
class = 'compact hover',
extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'),
options = list(
autoWidth = FALSE,
dom = 'Qlfrtip',
scrollX = TRUE,
fixedColumns = list(leftColumns = 2),
keys = TRUE,
scroller = TRUE,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
:::
```{r}
#| echo: false
# save all table to Excel file
openxlsx::saveWorkbook(
wb = wb, overwrite = TRUE,
file = file.path("reports/manuscript_prophages", "ms_prophages_supplementary_data.xlsx")
)
```